LLMultFFT(const SLong& x, const SLong& y, SLong& z, uint N ), FFTWorkArea class


/*  slmllfft.cpp  by K.Tsuru  */
// function ID = 217  DRADIX, BRADIX
/**************************************************************
SLong class
See also "sfft.h".
Provides a fast Fourie transform(FFT) multiplication.
z = x*y
It uses real discrete transform(RDFT).
[merits]
The work area becomes half.

Version 2.10
 Ooura's program is introduced.
rdft version is introduced
 July, 2001
****************************************************************/
#ifndef SN_H
#include "sn.h"
#endif

static uint fftSize = 0;
static FFTWorkArea *F;
static NCBlock<fftType> Wx, Wy; //For x and y.

void rdft(int n, int isgn, fftType *a, int *ip, fftType *w);

/*************** member functions of FFTWorkArea *************************************/
/*
struct FFTWorkArea{
    int size, ipsz;
    int* ip;    // for bit reverse table
    double* w;  // for sine/cosine table
//constructor
    FFTWorkArea():size(0),ipsz(0),ip(NULL),w(NULL){}
    void Allocate(int N);   // 217
    void MemFree();     // 217
    ~FFTWorkArea(){}
};
*/
#if USES_SIN_COS_TABLE
    uint FFTsinSize(){  // table size of sine.
        if(fftSize==0) return 0;
        return (uint)FFTSineTableSize();
    }
#else
    uint FFTsinSize(){  // table size of sine.
        if(fftSize==0) return 0;
        int i, s = 0;
        for(i=0;i<=FFT_MAX_SIZE_BITS;i++) s += F[i].size;
        return (uint)s;
    }
#endif

void FFTWorkArea::Allocate(int N){ // N is FFT size which is power of 2.
    ipsz = 2+(1<<(int)(log(N/2+0.5)/log(2.0))/2);
    ip = new int[ipsz+2];
    ip[0]=0;
    size=N/2+2;
    w = new fftType[size];
}

void FFTWorkArea::MemFree(){
    delete[] ip;    delete[] w;     ip=NULL;        w=NULL;
    size = ipsz = 0;
}
/*************************************************************************************/
uint FFTSize(){ return fftSize; }
uint FFTWorkSize(){ // size of work area
    if(fftSize==0) return 0;
    return FFTsinSize()+Wx.size()+Wy.size();
}

// in the case x != y
static void rdftProduct(const fType* xv, uint hx, const fType* yv, uint hy,
    fftType* X, fftType* Y, uint N, int *ip, fftType *w){
// The memory of *X and *Y must be prepared in call routine.
    uint i, i2;
// step1 initialization
    for(i = 0; i <= hx ; i++) X[i] = xv[i];
    while(i<N){  X[i] = 0.0;     i++; }
    for(i = 0; i <= hy ; i++) Y[i] = yv[i];
    while(i<N){  Y[i] = 0.0;     i++; }
// step 2 Fourie transform
    rdft(N, 1, X, ip, w);
    rdft(N, 1, Y, ip, w);
// step 3 calculation of products
    fftType xr, xi, yr, yi;
    X[0] = X[0]*Y[0];
    X[1] = X[1]*Y[1];
    for(i = 1; i < N/2; i++){
        i2 = 2*i;
        xr = X[i2];
        xi = X[i2+1];
        yr = Y[i2];
        yi = Y[i2+1];
        X[i2] = xr*yr - xi*yi;
        X[i2+1] = xr*yi + xi*yr;
    }
}
// in the case x == y
static void rdftSquare(const fType* xv, uint hx, fftType* X, uint N, int *ip, fftType *w){
    uint i, i2;

// step1 initialization
    for(i = 0; i <= hx ; i++) X[i] = xv[i];
    while(i<N){  X[i] = 0.0;     i++; }
// step 2 Fourie transform
    rdft(N, 1, X, ip, w);
// step 3 calculation of products
    fftType xr, xi;
    X[0] = X[0]*X[0];
    X[1] = X[1]*X[1];
    for(i = 1; i < N/2; i++){
        i2 = 2*i;
        xr = X[i2];
        xi = X[i2+1];
        X[i2] = xr*xr - xi*xi;
        X[i2+1] = 2.0*xr*xi;
    }
}
// select the normalization method
#ifdef _WIN64 // 64bit
#define USES_llongNormalize 1
#else
#define USES_llongNormalize 0
#endif

#if USES_llongNormalize == 0
//When double > LONG_MAX the substitution "long = double" is not allowed.
static fType fftNormalize(fftType x, const fType radix, fftType *carry) {
    fftType r;
    x += ROUND_DBL_INT;
    r = fftFmod(x, (fftType)radix); // x = 123999456.999687001
//cout << "x = "<< x << " r = "<< r << endl;    // r = 9456.9996870011...
    *carry = (x-r)/radix; // carry = 12399.94569999, r = 9457.24968795478344
//  carry = 12399 f = 9457
    return (fType)(r + ROUND_DBL_INT);   // to integer
}
#endif
#if USES_LONG_DBL_FFT
fftType fftModfl(const fftType x, fftType *ip){// modfl() my version
    long long int ipl = x;
    *ip = ipl;
    return x-ipl;
}
#endif
////////////////////// main body /////////////////////////////
/// z = x*y ///
void SLong::LLMultFFT(const SLong& x, const SLong& y, SLong& z, uint N ){
    fftType *X, *Y, *w;
    int tblNo = howpow2(N), *ip;

    if(fftSize==0) F=new FFTWorkArea[FFT_MAX_SIZE_BITS+1];

    if(F[tblNo].size==0) F[tblNo].Allocate((int)N);
    fftSize = N;
    w = F[tblNo].w;     ip = F[tblNo].ip;
    if(Wx.size() == 0) z.fftUsedTimes = 0;
    if(Wx.size() < N){
        Wx.size(N, -1); // enlarge only
        Wy.size(N, -1);
    }
    X = Wx.Elements();  Y = Wy.Elements();
    uint i;
    uint hx = x.Head(), hy = y.Head();
#ifndef NDEBUG
assert(N <= z.FFTMaxArraySize());
assert(x.Type() & x.DEC_INT);
#endif
    if(hx+hy+2u > N){
        x.SetError(x.SYNTAX_ERR, "LLMultFFT", 217);
    }

    const fType* xv = x.ReadFigures();
    const fType* yv = y.ReadFigures();
// step 1,2,3 initialization, rdft() and make products.
    if(&x == &y)rdftSquare(xv, hx, X, N, ip, w);
    else        rdftProduct(xv, hx, yv, hy, X, Y, N, ip, w);

// step 4 inverse transform and division by N/2
    rdft(N, -1, X, ip, w);
    fftType recN2 = 2.0/(fftType)N;
    for(i = 0; i < N; i++) X[i] *= recN2;

//It checks that X[n] are very close to integer or not.
    if(SNManager::FFTVerify() == ON){
        fftType ipart;
        const fftType dmax=(fftType)fftLdexp(1.0, fftType_MANT_DIG); // 1.0*2^DBL_MANT_DIG(=53)
        const double err = ROUND_DBL_INT;
        for(i = 0; i < N; i++){
#if USES_LONG_DBL_FFT
            fftModfl(X[i]+ROUND_DBL_INT, &ipart);
#else
            fftModf(X[i]+ROUND_DBL_INT, &ipart);
#endif
            if((fftFabs(ipart - X[i]) >= err) || (ipart>dmax)){
                z.SetError(z.FFT_ERR, "round off", 217);
            }   
        }
    }   
//step 5 normalization in double and conversion to "fType" type
    uint zh = x.aHead + y.aHead +1u, zt = x.aTail + y.aTail;
//It allocates the memory.
    if(z.figure.size() < N){
        z.valloc(N, -1);
    }
    z.figure.clear(zh); //The part which is not substituted here is filled with zeros.
    fType* zv = z.figure.Elements();

//It cannot obtain a faster speed by treating the BRADIX as another processing.
#if USES_llongNormalize //defined _WIN64  // since version 2.30
// 64 bit integer
    long long int t, rdx = x.Radix();;
    for(i = 0; i < zh ; i++){
        X[i] += ROUND_DBL_INT;  //abc.99999 ==> abC.0???, ROUND_DBL_INT = 0.25
        if(X[i] < rdx){
            zv[i] = (fType)X[i];
            continue;
        }
        t = X[i];   // double --> long long integer
        zv[i] = t % rdx;    // remainder
        X[i+1] += t / rdx;  // carry
    }
#else
    fftType carry;
    for(i = 0; i < zh ; i++){
        zv[i] = fftNormalize(X[i], x.Radix(), &carry);
        X[i+1] += carry;
    }   
#endif

#ifndef NDEBUG
z.figure[i];    assert(z(i+1u) == 0);
#endif
// last figure  By the normalization it is possible that X[2*i] != 0.
    zv[i] = fType(X[i] + ROUND_DBL_INT);
    z.SetSign( x.Sign()*y.Sign() );
// step 6 It decides the figure positions.
    while(!zv[zh]) zh--;
    z.aHead = zh;
    while(!zv[zt]) zt++;
    z.aTail = zt;
#ifndef NDEBUG
assert(zt <= zh);
#endif
    z.fftUsedTimes++;
}
slmllfft.cpp : last modifiled at 2017/09/21 10:34:52(6,936 bytes)
created at 2017/10/07 10:26:48
The creation time of this html file is 2017/11/09 14:52:03 (Thu Nov 09 14:52:03 2017).